# Import required R libraries
library(fpp3)

Exercise 9.1

Figure 9.32 shows the ACFs for 36 random numbers, 360 random numbers and 1,000 random numbers.

Section a

Explain the differences among these figures. Do they all indicate that the data are white noise?

Answer: XXX

Section b

Why are the critical values at different distances from the mean of zero? Why are the autocorrelations different in each figure when they each refer to white noise?

Answer: XXX

Exercise 9.2

A classic example of a non-stationary series are stock prices. Plot the daily closing prices for Amazon stock (contained in gafa_stock), along with the ACF and PACF. Explain how each plot shows that the series is non-stationary and should be differenced.

gafa_stock %>%
  filter(Symbol == 'AMZN') %>%
  autoplot(Close) +
  labs(title = "Amazon Daily Closing Stock Price",
       y = "Price")

gafa_stock %>%
  filter(Symbol == "AMZN") %>%
  ACF(Close) %>%
  autoplot()

gafa_stock %>%
  filter(Symbol == "AMZN") %>%
  PACF(Close) %>%
  autoplot()

Exercise 9.3

For the following series, find an appropriate Box-Cox transformation and order of differencing in order to obtain stationary data.

Section a

Turkish GDP from global_economy.

turkey_gdp <- global_economy %>%
  filter(Country == 'Turkey')

lambda <- turkey_gdp %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

turkey_gdp %>%
  autoplot(box_cox(GDP, lambda)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed Turkish GDP with $\\lambda$ = ",
         round(lambda,2))))

Appears to have an increasing trend after Box-Cox transformation. Attempt first differencing.

turkey_gdp <- turkey_gdp %>%
  mutate(diff_bc_gdp = difference(box_cox(GDP, lambda)))

# Display plot of first difference
turkey_gdp %>%
  autoplot(diff_bc_gdp) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed Turkish GDP with $\\lambda$ = ",
         round(lambda,2))))

# Apply Ljung-Box test
turkey_gdp %>%
  features(diff_bc_gdp, ljung_box, lag = 10)
## # A tibble: 1 × 3
##   Country lb_stat lb_pvalue
##   <fct>     <dbl>     <dbl>
## 1 Turkey     5.84     0.829

Section b

Accommodation takings in the state of Tasmania from aus_accommodation.

Quarterly data

tas_takings <- aus_accommodation %>%
  filter(State == 'Tasmania')

lambda <- tas_takings %>%
  features(Takings, features = guerrero) %>%
  pull(lambda_guerrero)

tas_takings %>% autoplot(box_cox(Takings, lambda)) + labs(y = "“, title = latex2exp::TeX(paste0(”Transformed gas production with \(\\lambda\) = ", round(lambda,2))))

tas_takings %>%
  transmute(
    `Takings` = Takings,
    `Box-Cox Takings` = box_cox(Takings, lambda),
    `Annual change in Box-Cox Takings` = difference(box_cox(Takings, lambda), 4),
    `Doubly differenced Takings` =
                     difference(difference(box_cox(Takings, lambda), 4), 1)
  ) %>%
  pivot_longer(-Date, names_to="Type", values_to="Takings") %>%
  mutate(
    Type = factor(Type, levels = c(
      "Takings",
      "Box-Cox Takings",
      "Annual change in Box-Cox Takings",
      "Doubly differenced Takings"))
  ) %>%
  ggplot(aes(x = Date, y = Takings)) +
  geom_line() +
  facet_grid(vars(Type), scales = "free_y") +
  labs(title = "Tasmanian Accomodation Takings", y = NULL)

Definitely appears to have a seasonal component.

Appears the doubly differencing is necessary.

tas_takings %>%
  mutate(box_cox_takings = box_cox(Takings, lambda)) %>%
  features(box_cox_takings, unitroot_nsdiffs)
## # A tibble: 1 × 2
##   State    nsdiffs
##   <chr>      <int>
## 1 Tasmania       1
# Result is ...
#> # A tibble: 1 x 1
#>   nsdiffs
#>     <int>
#> 1       1

tas_takings %>%
  mutate(box_cox_takings = difference(box_cox(Takings, lambda), 4)) %>%
  features(box_cox_takings, unitroot_ndiffs)
## # A tibble: 1 × 2
##   State    ndiffs
##   <chr>     <int>
## 1 Tasmania      0
# Result is ...
#> # A tibble: 1 x 1
#>   ndiffs
#>    <int>
#> 1      0
# DO I REALLY WANT TO KEEP THIS?
# Apply Ljung-Box test
tas_takings %>%
  features(difference(difference(box_cox(Takings, lambda), 4), 1), ljung_box, lag = 12)
## # A tibble: 1 × 3
##   State    lb_stat lb_pvalue
##   <chr>      <dbl>     <dbl>
## 1 Tasmania    29.0   0.00398
## NOT THE RESULT I WANT, DOUBLE-CHECK ##
## unitroot_nsdiffs() from section 9.1 instead of ljung-box test

tas_takings %>%
  ACF(difference(difference(box_cox(Takings, lambda), 4), 1)) %>%
  autoplot()

Section c

Monthly sales from souvenirs.

lambda <- souvenirs %>%
  features(Sales, features = guerrero) %>%
  pull(lambda_guerrero)

souvenirs %>%
  autoplot(box_cox(Sales, lambda)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed gas production with $\\lambda$ = ",
         round(lambda,2))))

souvenirs %>%
  transmute(
    `Sales` = Sales,
    `Box-Cox sales` = box_cox(Sales, lambda),
    `Annual change in Box-Cox sales` = difference(box_cox(Sales, lambda), 12),
    `Doubly differenced Box-Cox sales` =
                     difference(difference(box_cox(Sales, lambda), 12), 1)
  ) %>%
  pivot_longer(-Month, names_to="Type", values_to="Sales") %>%
  mutate(
    Type = factor(Type, levels = c(
      "Sales",
      "Box-Cox sales",
      "Annual change in Box-Cox sales",
      "Doubly differenced Box-Cox sales"))
  ) %>%
  ggplot(aes(x = Month, y = Sales)) +
  geom_line() +
  facet_grid(vars(Type), scales = "free_y") +
  labs(title = "Souvenirs Sales", y = NULL)

Definitely appears to have a seasonal component.

Appears the double differencing is needed.

souvenirs %>%
  mutate(box_cox_sales = box_cox(Sales, lambda)) %>%
  features(box_cox_sales, unitroot_nsdiffs)
## # A tibble: 1 × 1
##   nsdiffs
##     <int>
## 1       1
# Result is ....
#> # A tibble: 1 x 1
#>   nsdiffs
#>     <int>
#> 1       1

# Now try differencing on the seasonal difference
souvenirs %>%
  mutate(box_cox_sales = difference(box_cox(Sales, lambda), 12)) %>%
  features(box_cox_sales, unitroot_ndiffs)
## # A tibble: 1 × 1
##   ndiffs
##    <int>
## 1      0
# Result is ...
#> # A tibble: 1 x 1
#>   ndiffs
#>    <int>
#> 1      0
# DO I REALLY WANT THIS??
# Apply Ljung-Box test
souvenirs %>%
  features(difference(difference(box_cox(Sales, lambda), 12), 1), ljung_box, lag = 12)
## # A tibble: 1 × 2
##   lb_stat lb_pvalue
##     <dbl>     <dbl>
## 1    26.2    0.0102
## NOT THE RESULT I WANT, DOUBLE-CHECK ##
## unitroot_nsdiffs() from section 9.1 instead of ljung-box test

Exercise 9.5

For your retail data (from Exercise 8 in Section 2.10), find the appropriate order of differencing (after transformation if necessary) to obtain stationary data.

set.seed(8675309)

# Montly data
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries %>%
  autoplot(Turnover)

head(myseries)
## # A tsibble: 6 x 5 [1M]
## # Key:       State, Industry [1]
##   State      Industry               `Series ID`    Month Turnover
##   <chr>      <chr>                  <chr>          <mth>    <dbl>
## 1 Queensland Takeaway food services A3349718A   1982 Apr     26.7
## 2 Queensland Takeaway food services A3349718A   1982 May     27.3
## 3 Queensland Takeaway food services A3349718A   1982 Jun     26.2
## 4 Queensland Takeaway food services A3349718A   1982 Jul     25.2
## 5 Queensland Takeaway food services A3349718A   1982 Aug     25.6
## 6 Queensland Takeaway food services A3349718A   1982 Sep     26.7
# Box Cox
lambda <- myseries %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

myseries %>%
  autoplot(box_cox(Turnover, lambda)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed retail tunrover with $\\lambda$ = ",
         round(lambda,2))))

myseries %>%
  transmute(
    `Turnover` = Turnover,
    `Box-Cox turnover` = box_cox(Turnover, lambda),
    `Annual change in Box-Cox turnover` = difference(box_cox(Turnover, lambda), 12),
    `Doubly differenced Box-Cox turnover` =
                     difference(difference(box_cox(Turnover, lambda), 12), 1)
  ) %>%
  pivot_longer(-Month, names_to="Type", values_to="Turnover") %>%
  mutate(
    Type = factor(Type, levels = c(
      "Turnover",
      "Box-Cox turnover",
      "Annual change in Box-Cox turnover",
      "Doubly differenced Box-Cox turnover"))
  ) %>%
  ggplot(aes(x = Month, y = Turnover)) +
  geom_line() +
  facet_grid(vars(Type), scales = "free_y") +
  labs(title = "Retail Turnover", y = NULL)

# nsdiff
myseries %>%
  mutate(box_cox_turnover = box_cox(Turnover, lambda)) %>%
  features(box_cox_turnover, unitroot_nsdiffs)
## # A tibble: 1 × 3
##   State      Industry               nsdiffs
##   <chr>      <chr>                    <int>
## 1 Queensland Takeaway food services       1
# Result is ....
#> # A tibble: 1 x 1
#>   nsdiffs
#>     <int>
#> 1       1

# ndiff
# Now try differencing on the seasonal difference
myseries %>%
  mutate(box_cox_turnover = difference(box_cox(Turnover, lambda), 12)) %>%
  features(box_cox_turnover, unitroot_ndiffs)
## # A tibble: 1 × 3
##   State      Industry               ndiffs
##   <chr>      <chr>                   <int>
## 1 Queensland Takeaway food services      1
# Result is ... also 1
#> # A tibble: 1 x 1
#>   ndiffs
#>    <int>
#> 1      1

Exercise 9.6

Simulate and plot some data from simple ARIMA models.

Section a

Use the following R code to generate data from an AR(1) model with
\(\phi_1 = 0.6\) and \(\sigma^2 = 1\). The process starts with \(y_1 = 0\).

y <- numeric(100)
e <- rnorm(100)

for(i in 2:100)
  y[i] <- 0.6*y[i-1] + e[i]

sim <- tsibble(idx = seq_len(100), y = y, index = idx)

Section b

Produce a time plot for the series. How does the plot change as you change \(\phi_1\)?

sim %>% autoplot(y)

# phi=0.2
for(i in 2:100)
  y[i] <- 0.2*y[i-1] + e[i]
sim02 <- tsibble(idx = seq_len(100), y = y, index = idx)

# phi=1.0
for(i in 2:100)
  y[i] <- 1.0*y[i-1] + e[i]
sim10 <- tsibble(idx = seq_len(100), y = y, index = idx)

sim02 %>% autoplot(y)

sim10 %>% autoplot(y)

Lower the \(\phi_1\), the more oscillations occur, the higher the fewer oscillations

Section c

Write your own code to generate data from an MA(1) model with \(\theta_1 = 0.6\) and \(\sigma^2 = 1\).

\[MA(1)\ is\ y_t = c + \epsilon_t + \theta_1\epsilon_{t-1}\]

y <- numeric(100)
e <- rnorm(100)

for(i in 2:100)
  y[i] <- e[i] + 0.6*e[i-1]

sim_ma1 <- tsibble(idx = seq_len(100), y = y, index = idx)

#head(sim_ma1)

Section d

Produce a time plot for the series. How does the plot change as you change \(\theta_1\)?

sim_ma1 %>% autoplot(y)

# theta is 0.2
for(i in 2:100)
  y[i] <- e[i] + 0.2*e[i-1]
sim_ma02 <- tsibble(idx = seq_len(100), y = y, index = idx)

# theta is 1.0
for(i in 2:100)
  y[i] <- e[i] + 1.0*e[i-1]
sim_ma10 <- tsibble(idx = seq_len(100), y = y, index = idx)

sim_ma02 %>% autoplot(y)

sim_ma10 %>% autoplot(y)

Section e

Generate data from an ARMA(1,1) model with \(\phi_1 = 0.6,\) \(\theta_1 = 0.6\) and \(\sigma^2 = 1\).

y <- numeric(100)
e <- rnorm(100)

phi <- 0.6
theta <- 0.6

for(i in 2:100)
  y[i] <- phi*y[i-1] + theta*e[i-1] + e[i]

sim_arma11 <- tsibble(idx = seq_len(100), y = y, index = idx)

Section f

Generate data from an AR(2) model with \(\phi_1 = -0.8\), \(\phi_2 = 0.3\) and \(\sigma^2 = 1\). (Note that these parameters will give a non-stationary series.)

\[AR(2)\ is \ y_t = c + \phi_1y_{t-1} + \phi_2y_{t-2} + \epsilon_t\]

y <- numeric(100)
e <- rnorm(100)

for(i in 3:100)
  y[i] <- -0.8*y[i-1] + 0.3*y[i-2] + e[i]

sim_ar2 <- tsibble(idx = seq_len(100), y = y, index = idx)

Section g

Graph the latter two series and compare them.

sim_arma11 %>% autoplot(y)

sim_ar2 %>% autoplot(y)

Exercise 9.7

Consider aus_airpassengers, the total number of passengers (in millions) from Australian air carriers for the period 1970-2011.

aus_airpassengers %>% head()
## # A tsibble: 6 x 2 [1Y]
##    Year Passengers
##   <dbl>      <dbl>
## 1  1970       7.32
## 2  1971       7.33
## 3  1972       7.80
## 4  1973       9.38
## 5  1974      10.7 
## 6  1975      11.1
# Year      Passengers

Section a

Use ARIMA() to find an appropriate ARIMA model. What model was selected. Check that the residuals look like white noise. Plot forecasts for the next 10 periods.

# Generate ARIMA model
aus_air_mod <- aus_airpassengers %>% model(ARIMA(Passengers))

Display model definition

# Output report to show model selected
report(aus_air_mod)
## Series: Passengers 
## Model: ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.8963
## s.e.   0.0594
## 
## sigma^2 estimated as 4.308:  log likelihood=-97.02
## AIC=198.04   AICc=198.32   BIC=201.65

Check the residuals look like white noise

aus_air_mod %>% gg_tsresiduals()

Plot forecasts for the next 10 periods

aus_air_fc <- aus_air_mod %>% forecast(h=10)

aus_air_fc
## # A fable: 10 x 4 [1Y]
## # Key:     .model [1]
##    .model             Year Passengers .mean
##    <chr>             <dbl>     <dist> <dbl>
##  1 ARIMA(Passengers)  2017 N(75, 4.3)  74.8
##  2 ARIMA(Passengers)  2018 N(77, 9.6)  77.0
##  3 ARIMA(Passengers)  2019  N(79, 16)  79.2
##  4 ARIMA(Passengers)  2020  N(81, 23)  81.3
##  5 ARIMA(Passengers)  2021  N(84, 32)  83.5
##  6 ARIMA(Passengers)  2022  N(86, 42)  85.7
##  7 ARIMA(Passengers)  2023  N(88, 53)  87.9
##  8 ARIMA(Passengers)  2024  N(90, 66)  90.1
##  9 ARIMA(Passengers)  2025  N(92, 80)  92.3
## 10 ARIMA(Passengers)  2026  N(94, 97)  94.5

Section b

Write the model in terms of the backshift operator.

Model: ARIMA(0,2,1)

\[(1-B)^2y_t = c + (1+\theta_1B)\epsilon_t\]

Random work, probably delete \[B(By_t) = B^2y_t = y_{t-2}\]

Section c

Plot forecasts from an ARIMA(0,1,0) model with drift and compare these to part a.

# Apparently drift just gets applied
aus_air_arima010 <- aus_airpassengers %>%
  model(ARIMA(Passengers ~ pdq(0,1,0)))

report(aus_air_arima010)
## Series: Passengers 
## Model: ARIMA(0,1,0) w/ drift 
## 
## Coefficients:
##       constant
##         1.4191
## s.e.    0.3014
## 
## sigma^2 estimated as 4.271:  log likelihood=-98.16
## AIC=200.31   AICc=200.59   BIC=203.97

Section d

Plot forecasts from an ARIMA(2,1,2) model with drift and compare these to parts a and c. Remove the constant and see what happens.

aus_air_arima212 <- aus_airpassengers %>%
  model(ARIMA(Passengers ~ pdq(1,1,2)))
report(aus_air_arima212)
## Series: Passengers 
## Model: ARIMA(1,1,2) w/ drift 
## 
## Coefficients:
##          ar1      ma1     ma2  constant
##       0.9240  -0.9828  0.1323    0.1083
## s.e.  0.1155   0.1920  0.1479    0.0384
## 
## sigma^2 estimated as 4.388:  log likelihood=-97.28
## AIC=204.57   AICc=206.07   BIC=213.71
# 212 produces NULL model
# 211 works though
# 112 works, too

Section e

Plot forecasts from an ARIMA(0,2,1) model with a constant. What happens?

aus_air_arima021 <- aus_airpassengers %>%
  model(ARIMA(Passengers ~ pdq(0,2,1)))

report(aus_air_arima021)
## Series: Passengers 
## Model: ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.8963
## s.e.   0.0594
## 
## sigma^2 estimated as 4.308:  log likelihood=-97.02
## AIC=198.04   AICc=198.32   BIC=201.65

Exercise 9.8

For the United States GDP series (from global_economy):

us_gdp <- global_economy %>%
  filter(Country == "United States") %>%
  mutate(GDP = GDP/1e9) # GDP in billions
  
head(us_gdp)
## # A tsibble: 6 x 9 [1Y]
## # Key:       Country [1]
##   Country       Code   Year   GDP Growth   CPI Imports Exports Population
##   <fct>         <fct> <dbl> <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
## 1 United States USA    1960  543.  NA     13.6    4.20    4.97  180671000
## 2 United States USA    1961  563.   2.30  13.7    4.03    4.90  183691000
## 3 United States USA    1962  605.   6.10  13.9    4.13    4.81  186538000
## 4 United States USA    1963  639.   4.40  14.0    4.09    4.87  189242000
## 5 United States USA    1964  686.   5.80  14.2    4.10    5.10  191889000
## 6 United States USA    1965  744.   6.40  14.4    4.24    4.99  194303000
us_gdp %>% autoplot(GDP)

Section a

if necessary, find a suitable Box-Cox transformation for the data;

# Box Cox
lambda <- us_gdp %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

us_gdp %>%
  autoplot(box_cox(GDP, lambda)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed US GDP with $\\lambda$ = ",
         round(lambda,2))))

us_gdp <- us_gdp %>%
  mutate(GDP_T = box_cox(GDP, lambda))

us_gdp <- us_gdp %>%
  mutate(Diff = difference(GDP_T))

head(us_gdp)
## # A tsibble: 6 x 11 [1Y]
## # Key:       Country [1]
##   Country Code   Year   GDP Growth   CPI Imports Exports Population GDP_T   Diff
##   <fct>   <fct> <dbl> <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl> <dbl>  <dbl>
## 1 United… USA    1960  543.  NA     13.6    4.20    4.97  180671000  17.4 NA    
## 2 United… USA    1961  563.   2.30  13.7    4.03    4.90  183691000  17.6  0.215
## 3 United… USA    1962  605.   6.10  13.9    4.13    4.81  186538000  18.0  0.431
## 4 United… USA    1963  639.   4.40  14.0    4.09    4.87  189242000  18.4  0.330
## 5 United… USA    1964  686.   5.80  14.2    4.10    5.10  191889000  18.8  0.445
## 6 United… USA    1965  744.   6.40  14.4    4.24    4.99  194303000  19.3  0.517
us_gdp %>%
  ACF(Diff) %>%
  autoplot()

us_gdp %>%
  PACF(Diff) %>%
  autoplot()

Results in \(\lambda = 0.28\).

Section b

fit a suitable ARIMA model to the transformed data using ARIMA();

us_gdp_mod <- us_gdp %>% model(ARIMA(GDP_T))

report(us_gdp_mod)
## Series: GDP_T 
## Model: ARIMA(1,1,0) w/ drift 
## 
## Coefficients:
##          ar1  constant
##       0.4586    0.3428
## s.e.  0.1198    0.0276
## 
## sigma^2 estimated as 0.0461:  log likelihood=7.72
## AIC=-9.43   AICc=-8.98   BIC=-3.3

Section c

try some other plausible models by experimenting with the orders chosen;

us_gdp_mod111 <- us_gdp %>%
  model(ARIMA(GDP_T ~ pdq(1,1,1)))

us_gdp_mod211 <- us_gdp %>%
  model(ARIMA(GDP_T ~ pdq(2,1,1)))

us_gdp_mod112 <- us_gdp %>%
  model(ARIMA(GDP_T ~ pdq(1,1,2)))

us_gdp_mod101 <- us_gdp %>%
  model(ARIMA(GDP_T ~ pdq(1,0,1)))

Section d

choose what you think is the best model and check the residual diagnostics;

Section e

produce forecasts of your fitted model. Do the forecasts look reasonable?

Section f

compare the results with what you would obtain using ETS() (with no transformation).